Import Library¶

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import july
from july.utils import date_range
import plotly.express as px
import holidays

# calendar_plot() cmap: Union[str, LinearSegmentedColormap, ListedColormap] = "july",

%matplotlib inline
c:\Users\ASUS\.conda\envs\ml\lib\site-packages\scipy\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.4
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"

Eksplorasi Data Analysis¶

Analisis Perusahaannya¶

In [2]:
df_m6 = pd.read_csv('M6_Universe.csv')
print('Ukuran Shape',df_m6.shape)
df_m6.head()
Ukuran Shape (100, 6)
Out[2]:
id class symbol name GICS_sector/ETF_type GICS_industry/ETF_subtype
0 1 Stock ABBV AbbVie Health Care Pharmaceuticals
1 2 Stock ACN Accenture Information Technology IT Consulting & Other Services
2 3 Stock AEP American Electric Power Utilities Electric Utilities
3 4 Stock AIZ Assurant Financials Multi-line Insurance
4 5 Stock ALLE Allegion Industrials Building Products
In [3]:
for i in df_m6.columns:
    panjang_unique = len(df_m6[i].unique())
    if(panjang_unique < df_m6.shape[0]):
        print(i,len(df_m6[i].unique()),df_m6[i].unique())
class 2 ['Stock' 'ETF']
GICS_sector/ETF_type 15 ['Health Care' 'Information Technology' 'Utilities' 'Financials'
 'Industrials' 'Consumer Discretionary' 'Real Estate' 'Materials'
 'Consumer Staples' 'Communication Services' 'Energy' 'Equities'
 'Fixed Income' 'Commodities' 'Volatility']
GICS_industry/ETF_subtype 60 ['Pharmaceuticals' 'IT Consulting & Other Services' 'Electric Utilities'
 'Multi-line Insurance' 'Building Products' 'Semiconductor Equipment'
 'Asset Management & Custody Banks' 'Internet & Direct Marketing Retail'
 'Residential REITs' 'Paper Packaging' 'Consumer Finance'
 'Health Care Equipment' 'Distillers & Vintners'
 'Health Care Distributors' 'Data Processing & Outsourced Services'
 'Technology Distributors' 'Specialty Chemicals' 'Cable & Satellite'
 'Managed Health Care' 'Multi-Utilities'
 'Oil & Gas Exploration & Production' 'Diversified Support Services'
 'Casinos & Gaming' 'General Merchandise Stores' 'Restaurants'
 'Industrial REITs' 'Interactive Media & Services' 'Industrial Machinery'
 'Specialty Stores' 'Property & Casualty Insurance' 'Hotel & Resort REITs'
 'Diversified Banks' 'Food Retail' 'Personal Products'
 'Life & Health Insurance' 'Reinsurance'
 'Environmental & Facilities Services' 'Apparel Retail'
 'Trading Companies & Distributors' 'Research & Consulting Services'
 'Integrated Oil & Gas' 'Large Cap-North America'
 'Small Cap-North America' 'Large Cap-Europe' 'Small Cap-Europe'
 'Large Cap-Asia' 'Large Cap-South America' 'Large Cap-Global' 'Credit-US'
 'Govt-US' 'Govt-Europe' 'Credit-Europe' 'Govt-Global' 'Precious Metals'
 'Diversified Commodity' 'Sector-Global' 'Equity Style-North America'
 'Equity Style-Europe' 'Sector-US' 'US']
In [4]:
sns.countplot(x='class',data=df_m6)
Out[4]:
<AxesSubplot: xlabel='class', ylabel='count'>

Bisa diperhatikan, Bahwa jenis jenis sekuritas pada dataset tersebut memiliki banyak yang sama antara ETF dengan saham tersebut

Analisis Harganya¶

In [5]:
df_price = pd.read_csv('assets_m6.csv')
print(df_price.head())
print('shape',df_price.shape)
  symbol        date    price
0   ABBV  2022/01/31  133.124
1    ACN  2022/01/31  349.726
2    AEP  2022/01/31   87.403
3    AIZ  2022/01/31  149.851
4   ALLE  2022/01/31  121.261
shape (20999, 3)
In [6]:
urutan_tanggal = np.arange(min(df_price.date.astype('datetime64')),max(df_price.date.astype('datetime64')) + np.timedelta64(1,'D'),dtype='datetime64[D]')
urutan_tanggal.shape
Out[6]:
(303,)
In [7]:
kolom_tanggal = np.array(list(map(lambda x : x.replace('/','-'), df_price.date.unique())),dtype='datetime64[D]')
kolom_tanggal.shape
Out[7]:
(216,)
In [8]:
tanggal_kosong = np.setdiff1d(urutan_tanggal,kolom_tanggal)
tanggal_kosong = tanggal_kosong.astype('str')
tanggal_kosong = np.array(list(map(lambda x : x.replace('-','/'),tanggal_kosong)))
tanggal_kosong
Out[8]:
array(['2022/02/05', '2022/02/06', '2022/02/12', '2022/02/13',
       '2022/02/19', '2022/02/20', '2022/02/26', '2022/02/27',
       '2022/03/05', '2022/03/06', '2022/03/12', '2022/03/13',
       '2022/03/19', '2022/03/20', '2022/03/26', '2022/03/27',
       '2022/04/02', '2022/04/03', '2022/04/09', '2022/04/10',
       '2022/04/15', '2022/04/16', '2022/04/17', '2022/04/23',
       '2022/04/24', '2022/04/30', '2022/05/01', '2022/05/07',
       '2022/05/08', '2022/05/14', '2022/05/15', '2022/05/21',
       '2022/05/22', '2022/05/28', '2022/05/29', '2022/06/04',
       '2022/06/05', '2022/06/11', '2022/06/12', '2022/06/18',
       '2022/06/19', '2022/06/25', '2022/06/26', '2022/07/02',
       '2022/07/03', '2022/07/09', '2022/07/10', '2022/07/16',
       '2022/07/17', '2022/07/23', '2022/07/24', '2022/07/30',
       '2022/07/31', '2022/08/06', '2022/08/07', '2022/08/13',
       '2022/08/14', '2022/08/20', '2022/08/21', '2022/08/27',
       '2022/08/28', '2022/09/03', '2022/09/04', '2022/09/10',
       '2022/09/11', '2022/09/17', '2022/09/18', '2022/09/24',
       '2022/09/25', '2022/10/01', '2022/10/02', '2022/10/08',
       '2022/10/09', '2022/10/15', '2022/10/16', '2022/10/22',
       '2022/10/23', '2022/10/29', '2022/10/30', '2022/11/05',
       '2022/11/06', '2022/11/12', '2022/11/13', '2022/11/19',
       '2022/11/20', '2022/11/26', '2022/11/27'], dtype='<U10')
In [9]:
df_price
Out[9]:
symbol date price
0 ABBV 2022/01/31 133.124
1 ACN 2022/01/31 349.726
2 AEP 2022/01/31 87.403
3 AIZ 2022/01/31 149.851
4 ALLE 2022/01/31 121.261
... ... ... ...
20994 XLP 2022/11/29 75.670
20995 XLU 2022/11/29 69.690
20996 XLV 2022/11/29 135.780
20997 XLY 2022/11/29 141.300
20998 XOM 2022/11/29 110.540

20999 rows × 3 columns

In [10]:
df_price_wide = pd.pivot(df_price,index='symbol',columns='date',values='price')
df_price_wide
Out[10]:
date 2022/01/31 2022/02/01 2022/02/02 2022/02/03 2022/02/04 2022/02/07 2022/02/08 2022/02/09 2022/02/10 2022/02/11 ... 2022/11/16 2022/11/17 2022/11/18 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/28 2022/11/29
symbol
ABBV 133.124 133.231 134.806 136.858 136.780 138.609 139.562 139.260 138.784 138.103 ... 151.870 152.16 154.980 157.110 159.790 159.39 NaN 159.62 158.43 158.20
ACN 349.726 349.567 355.551 341.783 343.712 340.210 341.308 351.654 338.658 325.592 ... 291.510 287.14 286.500 287.020 292.890 294.53 NaN 296.40 290.60 287.98
AEP 87.403 85.421 86.901 87.200 86.214 86.833 87.210 87.171 85.230 85.484 ... 89.740 89.07 91.280 93.150 93.710 95.13 NaN 95.47 95.22 94.65
AIZ 149.851 148.613 150.323 149.154 150.401 150.077 153.320 163.656 159.736 159.107 ... 125.067 122.78 124.262 125.803 126.479 126.34 NaN 128.49 125.22 126.39
ALLE 121.261 118.563 118.741 114.720 117.822 119.018 120.263 123.681 119.245 117.447 ... 111.050 110.77 112.960 112.660 113.940 113.99 NaN 114.32 112.25 110.77
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
XLP 74.589 74.521 75.414 75.434 74.511 74.648 74.933 74.953 74.148 73.882 ... 73.910 73.89 74.680 75.390 75.940 76.20 NaN 76.25 75.96 75.67
XLU 67.791 66.890 67.879 67.575 67.037 66.939 66.988 67.282 65.578 65.618 ... 68.620 67.42 68.850 69.160 69.750 70.53 NaN 70.94 70.21 69.69
XLV 129.767 130.113 131.863 131.299 130.824 130.597 131.665 132.684 130.607 128.797 ... 133.020 132.96 134.570 134.630 135.800 136.34 NaN 137.15 136.12 135.78
XLY 183.866 184.980 183.876 178.249 183.319 182.942 185.516 187.972 184.731 179.422 ... 142.530 140.80 140.990 138.950 140.520 142.55 NaN 142.63 141.75 141.30
XOM 73.043 77.726 77.524 76.620 78.284 79.226 77.178 76.809 76.070 77.985 ... 112.910 113.06 112.080 110.970 114.180 113.61 NaN 113.21 109.81 110.54

100 rows × 216 columns

In [11]:
for i in tanggal_kosong:
    df_price_wide[i] = np.NaN
df_price_wide = df_price_wide[sorted(df_price_wide.columns.tolist())]
In [12]:
import plotly.express as px
import plotly.graph_objects as pog
import calendar

grafiknya = pog.Figure()

tanggal = np.array(pd.to_datetime(df_price_wide.columns.to_numpy()).to_numpy(),dtype=np.datetime64)

selector = 100*[False]
button_all = []
sel_num = 0



for i in df_price_wide.index:
    grafiknya.add_trace(
        pog.Line(x=tanggal,y=df_price_wide.loc[i,],name=i)
    )
    
    selector[sel_num] = True
    hasilku = selector
    button_1 = dict(
        label = i,
        method="update",
        args=[
            {"visible":selector[:100]},
            {'title':{'text':'Grafik Harga Saham '+ i}}
        ]
    )
    selector[sel_num] = False
    sel_num += 1
    button_all.append(button_1)

kamus = dict(
    label="All",
    method="update",
    args=[
            {"visible":100*[True]},
            {'title':{'text':'Grafik Harga Saham Seluruhnya'}}
        ]
)

button_all.insert(0,kamus)


grafiknya.update_layout(
    updatemenus=[
        dict(
            buttons = list(
                button_all
            ),
            direction="down",
            pad={"r": 20, "t": 10},
            showactive=True
        )
    ]
)
grafiknya.show()
c:\Users\ASUS\.conda\envs\ml\lib\site-packages\plotly\graph_objs\_deprecations.py:378: DeprecationWarning:

plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


Interpretasi grafik :

  • nilai saham tertinggi berasal dari perusahaan IEFM.L
  • Setengah dari beberapa saham tersebut memiliki perubahan yang sama dan cenderung sejajar
  • Untuk Saham CHTR memiliki grafik yang memotong dibandingkan saham UNH
In [13]:
grafiknya.write_html("assets/grafik_seluruh_saham.html")
In [14]:
import pandas as pd
from datetime import date
import holidays
from workalendar.europe import Germany
In [15]:
rata_rata_saham = df_price_wide.mean()
rata_rata_saham = pd.DataFrame(rata_rata_saham,columns=['price'])
rata_rata_saham['non_nan'] = rata_rata_saham['price'].apply(lambda x : 1 if(np.isnan(x)) else 0)
rata_rata_saham
Out[15]:
price non_nan
date
2022/01/31 122.667010 0
2022/02/01 123.441650 0
2022/02/02 123.941190 0
2022/02/03 121.688530 0
2022/02/04 121.522770 0
... ... ...
2022/11/25 114.457920 0
2022/11/26 NaN 1
2022/11/27 NaN 1
2022/11/28 112.959930 0
2022/11/29 113.610222 0

303 rows × 2 columns

In [16]:
july.calendar_plot(dates=np.array(tanggal,dtype='datetime64[D]').astype(datetime),date_label= True, weeknum_label = False,
                    data=rata_rata_saham['non_nan'],title=False)
plt.suptitle('Calendar Buka Bursa efek 31 Januari 2022 - 29 November 2022',y=1.05)
px.line(x=tanggal,y=df_price_wide.mean(),title="Grafik rata rata seluruh saham")
In [17]:
july.calendar_plot(dates=np.array(tanggal,dtype='datetime64[D]').astype(datetime),date_label= True, weeknum_label = False,
                    data=rata_rata_saham['non_nan'],title=False)
plt.suptitle('Calendar Buka Bursa efek 31 Januari 2022 - 29 November 2022',y=1.05)
plt.savefig('assets/Calendar Buka Bursa efek 31 Januari 2022 - 29 November 2022.jpeg')
In [18]:
hasil = px.line(x=tanggal,y=df_price_wide.mean(),title="Grafik rata rata seluruh saham")
hasil.write_html("assets/Grafik_rata_rata_seluruh_saham.html")

Kalendar yang ditandai merah merupakan waktu bursa efek ditutup:

  • kira kira pada hari weekend saja
  • pada 15 April 2022, itu adalah hari Good Friday yakni hari penyaliban Jesus menurut Agama kristen
In [19]:
hasil = holidays.US(years=2022)

july.calendar_plot(dates=list(hasil.keys()),date_label= True, weeknum_label = False,
                    data=len(list(hasil.keys()))*[1],cmap="spring")
plt.suptitle("Calendar Liburan Negara Amerika Serikat", fontsize="x-large", y=1.03)


px.line(x=tanggal,y=df_price_wide.mean(),title="Grafik rata rata seluruh saham")

Berdasarkan dari grafik ini. Perubahan Kalendar mengalami penurunan dan kenaikan secara dratis pada saat Hari menjelang libur, liburan , dan setelah liburan . hal itu dibuktikan dengan tanggal yang ada di grafik harga saham

sembunyikan¶
In [20]:
july.calendar_plot(dates=list(hasil.keys()),date_label= True, weeknum_label = False,
                    data=len(list(hasil.keys()))*[1],cmap="spring")
plt.suptitle("Calendar Liburan Negara Amerika Serikat", fontsize="x-large", y=1.03)
plt.savefig("assets/Calendar Liburan Negara Amerika Serikat.jpeg")
Tidak Sembunyi¶

Gabungkan Antara harga dengan sahamnya¶

In [21]:
df_price_wide = df_price_wide.reset_index()
# df_price_wide.index.name = None 
df_price_wide = df_price_wide.rename_axis(None, axis=1)

df_price_wide
Out[21]:
symbol 2022/01/31 2022/02/01 2022/02/02 2022/02/03 2022/02/04 2022/02/05 2022/02/06 2022/02/07 2022/02/08 ... 2022/11/20 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/26 2022/11/27 2022/11/28 2022/11/29
0 ABBV 133.124 133.231 134.806 136.858 136.780 NaN NaN 138.609 139.562 ... NaN 157.110 159.790 159.39 NaN 159.62 NaN NaN 158.43 158.20
1 ACN 349.726 349.567 355.551 341.783 343.712 NaN NaN 340.210 341.308 ... NaN 287.020 292.890 294.53 NaN 296.40 NaN NaN 290.60 287.98
2 AEP 87.403 85.421 86.901 87.200 86.214 NaN NaN 86.833 87.210 ... NaN 93.150 93.710 95.13 NaN 95.47 NaN NaN 95.22 94.65
3 AIZ 149.851 148.613 150.323 149.154 150.401 NaN NaN 150.077 153.320 ... NaN 125.803 126.479 126.34 NaN 128.49 NaN NaN 125.22 126.39
4 ALLE 121.261 118.563 118.741 114.720 117.822 NaN NaN 119.018 120.263 ... NaN 112.660 113.940 113.99 NaN 114.32 NaN NaN 112.25 110.77
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 XLP 74.589 74.521 75.414 75.434 74.511 NaN NaN 74.648 74.933 ... NaN 75.390 75.940 76.20 NaN 76.25 NaN NaN 75.96 75.67
96 XLU 67.791 66.890 67.879 67.575 67.037 NaN NaN 66.939 66.988 ... NaN 69.160 69.750 70.53 NaN 70.94 NaN NaN 70.21 69.69
97 XLV 129.767 130.113 131.863 131.299 130.824 NaN NaN 130.597 131.665 ... NaN 134.630 135.800 136.34 NaN 137.15 NaN NaN 136.12 135.78
98 XLY 183.866 184.980 183.876 178.249 183.319 NaN NaN 182.942 185.516 ... NaN 138.950 140.520 142.55 NaN 142.63 NaN NaN 141.75 141.30
99 XOM 73.043 77.726 77.524 76.620 78.284 NaN NaN 79.226 77.178 ... NaN 110.970 114.180 113.61 NaN 113.21 NaN NaN 109.81 110.54

100 rows × 304 columns

In [22]:
df_gabung = df_m6.merge(df_price_wide,on="symbol")
In [23]:
df_gabung_class = df_gabung.groupby('class').mean(numeric_only=True)
df_gabung_class = df_gabung_class.drop('id',axis=1)
df_gabung_class
Out[23]:
2022/01/31 2022/02/01 2022/02/02 2022/02/03 2022/02/04 2022/02/05 2022/02/06 2022/02/07 2022/02/08 2022/02/09 ... 2022/11/20 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/26 2022/11/27 2022/11/28 2022/11/29
class
ETF 83.15824 83.75248 84.23680 83.01748 82.91230 NaN NaN 82.98188 83.13774 84.37112 ... NaN 74.71600 75.47532 75.83896 100.3966 75.96270 NaN NaN 75.28726 75.417440
Stock 162.17578 163.13082 163.64558 160.35958 160.13324 NaN NaN 159.57528 161.21372 163.67890 ... NaN 149.52434 151.48794 152.42196 NaN 152.95314 NaN NaN 150.63260 152.582449

2 rows × 303 columns

In [24]:
import plotly.graph_objs as go
import plotly.express as px

fig = pog.Figure()
fig.add_trace(pog.Line(name="ETF",x=tanggal,y=df_gabung_class.loc['ETF',].to_numpy()))
fig.add_trace(pog.Line(name="Stock",x=tanggal,y=df_gabung_class.loc['Stock',].to_numpy()))
fig.update_layout(
    title={
        'text': "Harga Saham berdasarkan jenis sekuritas",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()
c:\Users\ASUS\.conda\envs\ml\lib\site-packages\plotly\graph_objs\_deprecations.py:378: DeprecationWarning:

plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


Berdasarkan Gambar tersebut . Maka Interpretasinya adalah:

  • Stock memiliki nilai lebih tinggi dibandingkan denmgan ETF
  • ETF lebih stabli meskipun ada titik kritis
  • Saham Lebih flukluatif dibandingkan ETF
In [25]:
fig.write_html('assets/Harga_saham_securitas.html')
In [26]:
df_gabung_kat = df_gabung.groupby("GICS_sector/ETF_type").mean(numeric_only=True)
df_gabung_kat = df_gabung_kat.drop('id',axis=1)
df_gabung_kat
Out[26]:
2022/01/31 2022/02/01 2022/02/02 2022/02/03 2022/02/04 2022/02/05 2022/02/06 2022/02/07 2022/02/08 2022/02/09 ... 2022/11/20 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/26 2022/11/27 2022/11/28 2022/11/29
GICS_sector/ETF_type
Commodities 24.713333 24.823333 24.876667 24.823333 24.950000 NaN NaN 25.186667 25.196667 25.343333 ... NaN 24.536667 24.706667 24.766667 NaN 24.673333 NaN NaN 24.373333 24.626667
Communication Services 347.432667 351.256000 357.092000 330.286667 329.675333 NaN NaN 324.866000 322.837667 329.401000 ... NaN 191.563333 198.106667 200.430000 NaN 199.706667 NaN NaN 196.500000 196.726667
Consumer Discretionary 185.120333 187.177667 185.035167 180.427667 182.011833 NaN NaN 181.078667 183.672667 186.526500 ... NaN 178.526667 179.195000 181.883333 NaN 181.903333 NaN NaN 179.606667 179.090000
Consumer Staples 89.015000 88.979333 90.199000 90.920333 89.661000 NaN NaN 89.242667 89.489000 89.422333 ... NaN 87.656667 88.646667 88.953333 NaN 89.496667 NaN NaN 89.120000 88.650000
Energy 79.009000 82.750000 82.970500 81.876000 83.187500 NaN NaN 84.171500 82.375500 82.967500 ... NaN 118.230000 122.080000 120.175000 NaN 120.010000 NaN NaN 116.825000 117.395000
Equities 91.822081 92.673568 93.308568 91.700541 91.695027 NaN NaN 91.788081 92.095838 93.721000 ... NaN 83.206324 84.145351 84.538676 148.859667 84.726162 NaN NaN 83.852081 84.046838
Financials 175.179429 176.895714 178.864857 176.232429 178.291000 NaN NaN 179.330429 182.681286 184.969857 ... NaN 177.193286 179.582714 180.121429 NaN 181.421429 NaN NaN 177.991429 179.581429
Fixed Income 73.882778 73.821333 73.930333 73.522667 73.005444 NaN NaN 72.995000 72.694556 72.904333 ... NaN 63.065111 63.427556 63.848556 27.702000 63.787444 NaN NaN 63.666222 63.529889
Health Care 169.774500 169.046500 172.042500 176.374333 175.180667 NaN NaN 175.937333 179.662500 180.892500 ... NaN 182.465000 184.190000 185.670000 NaN 187.418333 NaN NaN 186.128333 185.935000
Industrials 167.396143 167.840286 168.677571 165.420571 163.451571 NaN NaN 163.660714 165.887714 169.724143 ... NaN 177.786857 179.345429 179.939714 NaN 180.669571 NaN NaN 177.198571 175.724286
Information Technology 179.572286 180.360286 176.633857 171.873714 172.099286 NaN NaN 170.599429 171.904429 174.537714 ... NaN 148.453714 150.817143 151.708571 NaN 151.784286 NaN NaN 149.430000 147.705714
Materials 133.400333 134.397333 132.772000 130.699667 128.099333 NaN NaN 125.032667 127.609333 129.935000 ... NaN 108.593333 110.973333 111.616667 NaN 111.503333 NaN NaN 108.240000 110.230000
Real Estate 104.247667 104.152000 106.960333 106.445333 105.254667 NaN NaN 104.303667 103.076000 106.335000 ... NaN 78.353333 78.543333 78.330000 NaN 78.906667 NaN NaN 78.156667 94.725000
Utilities 48.026667 47.222667 47.856000 47.890667 47.389333 NaN NaN 47.618667 47.858000 47.864667 ... NaN 50.423333 50.966667 51.683333 NaN 51.913333 NaN NaN 51.726667 51.416667
Volatility 21.410000 19.840000 19.420000 21.780000 21.000000 NaN NaN 20.420000 19.500000 18.710000 ... NaN 15.970000 15.420000 15.080000 NaN 15.160000 NaN NaN 15.720000 15.490000

15 rows × 303 columns

In [27]:
import plotly.graph_objs as go
import plotly.express as px


kategori = df_gabung_kat.index.tolist()

fig = pog.Figure()
for kat in kategori:
    fig.add_trace(pog.Line(name=kat,x=tanggal,y=df_gabung_kat.loc[kat,].to_numpy()))

fig.update_layout(
    title={
        'text': "Harga Saham berdasarkan Kategori",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'})
fig.show()
c:\Users\ASUS\.conda\envs\ml\lib\site-packages\plotly\graph_objs\_deprecations.py:378: DeprecationWarning:

plotly.graph_objs.Line is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Line
  - plotly.graph_objs.layout.shape.Line
  - etc.


Berdasarkan Gambar tersebut . Maka Interpretasinya adalah:

  • Communication Service merupakan kategori dengan nilai tertinggi dibandingkan yang lain
  • Volatility merupakan kategori dengan nilai terendah dibandingkan yang lain
  • Ekuitas memiliki titik kritis di beberapa tempat
In [28]:
fig.write_html('assets/Harga_saham_berdasarkan_kategori.html')
In [29]:
df_gabung
Out[29]:
id class symbol name GICS_sector/ETF_type GICS_industry/ETF_subtype 2022/01/31 2022/02/01 2022/02/02 2022/02/03 ... 2022/11/20 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/26 2022/11/27 2022/11/28 2022/11/29
0 1 Stock ABBV AbbVie Health Care Pharmaceuticals 133.124 133.231 134.806 136.858 ... NaN 157.110 159.790 159.39 NaN 159.62 NaN NaN 158.43 158.20
1 2 Stock ACN Accenture Information Technology IT Consulting & Other Services 349.726 349.567 355.551 341.783 ... NaN 287.020 292.890 294.53 NaN 296.40 NaN NaN 290.60 287.98
2 3 Stock AEP American Electric Power Utilities Electric Utilities 87.403 85.421 86.901 87.200 ... NaN 93.150 93.710 95.13 NaN 95.47 NaN NaN 95.22 94.65
3 4 Stock AIZ Assurant Financials Multi-line Insurance 149.851 148.613 150.323 149.154 ... NaN 125.803 126.479 126.34 NaN 128.49 NaN NaN 125.22 126.39
4 5 Stock ALLE Allegion Industrials Building Products 121.261 118.563 118.741 114.720 ... NaN 112.660 113.940 113.99 NaN 114.32 NaN NaN 112.25 110.77
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 96 ETF XLC Communication Services Select Sector SPDR Fund Equities Sector-US 73.431 74.067 75.546 70.492 ... NaN 49.120 49.850 50.48 NaN 50.20 NaN NaN 49.40 49.42
96 97 ETF XLU Utilities Select Sector SPDR Fund Equities Sector-US 67.791 66.890 67.879 67.575 ... NaN 69.160 69.750 70.53 NaN 70.94 NaN NaN 70.21 69.69
97 98 ETF XLP Consumer Staples Select Sector SPDR Fund Equities Sector-US 74.589 74.521 75.414 75.434 ... NaN 75.390 75.940 76.20 NaN 76.25 NaN NaN 75.96 75.67
98 99 ETF XLB Materials Select Sector SPDR Fund Equities Sector-US 83.023 84.301 84.567 83.092 ... NaN 80.260 82.080 82.40 NaN 82.37 NaN NaN 80.53 80.80
99 100 ETF VXX iPath Series B S&P 500 VIX Short-Term Futures ETN Volatility US 21.410 19.840 19.420 21.780 ... NaN 15.970 15.420 15.08 NaN 15.16 NaN NaN 15.72 15.49

100 rows × 309 columns

Pembersihan Data¶

In [30]:
df_gabung = df_gabung.dropna(how='all',axis=1)
In [31]:
df_harga = df_gabung.iloc[:,6:]
df_harga
Out[31]:
2022/01/31 2022/02/01 2022/02/02 2022/02/03 2022/02/04 2022/02/07 2022/02/08 2022/02/09 2022/02/10 2022/02/11 ... 2022/11/16 2022/11/17 2022/11/18 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/28 2022/11/29
0 133.124 133.231 134.806 136.858 136.780 138.609 139.562 139.260 138.784 138.103 ... 151.870 152.16 154.980 157.110 159.790 159.39 NaN 159.62 158.43 158.20
1 349.726 349.567 355.551 341.783 343.712 340.210 341.308 351.654 338.658 325.592 ... 291.510 287.14 286.500 287.020 292.890 294.53 NaN 296.40 290.60 287.98
2 87.403 85.421 86.901 87.200 86.214 86.833 87.210 87.171 85.230 85.484 ... 89.740 89.07 91.280 93.150 93.710 95.13 NaN 95.47 95.22 94.65
3 149.851 148.613 150.323 149.154 150.401 150.077 153.320 163.656 159.736 159.107 ... 125.067 122.78 124.262 125.803 126.479 126.34 NaN 128.49 125.22 126.39
4 121.261 118.563 118.741 114.720 117.822 119.018 120.263 123.681 119.245 117.447 ... 111.050 110.77 112.960 112.660 113.940 113.99 NaN 114.32 112.25 110.77
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 73.431 74.067 75.546 70.492 70.671 69.439 69.410 71.366 70.343 68.824 ... 50.060 49.68 49.530 49.120 49.850 50.48 NaN 50.20 49.40 49.42
96 67.791 66.890 67.879 67.575 67.037 66.939 66.988 67.282 65.578 65.618 ... 68.620 67.42 68.850 69.160 69.750 70.53 NaN 70.94 70.21 69.69
97 74.589 74.521 75.414 75.434 74.511 74.648 74.933 74.953 74.148 73.882 ... 73.910 73.89 74.680 75.390 75.940 76.20 NaN 76.25 75.96 75.67
98 83.023 84.301 84.567 83.092 81.716 81.313 82.571 84.370 83.879 82.581 ... 80.230 79.46 79.990 80.260 82.080 82.40 NaN 82.37 80.53 80.80
99 21.410 19.840 19.420 21.780 21.000 20.420 19.500 18.710 20.460 23.240 ... 16.740 16.58 16.410 15.970 15.420 15.08 NaN 15.16 15.72 15.49

100 rows × 216 columns

In [32]:
df_gabung = df_gabung.iloc[:,:6]
In [33]:
df_harga = df_harga.interpolate(axis=1)
kolom_harga = df_harga.columns
df_harga.isna().sum()[df_harga.isna().sum() != 0]
Out[33]:
Series([], dtype: int64)
In [34]:
from sklearn.preprocessing import StandardScaler
stdsca = StandardScaler()
df_harga = stdsca.fit_transform(df_harga)
df_harga = pd.DataFrame(df_harga,columns=kolom_harga)
df_harga
Out[34]:
2022/01/31 2022/02/01 2022/02/02 2022/02/03 2022/02/04 2022/02/07 2022/02/08 2022/02/09 2022/02/10 2022/02/11 ... 2022/11/16 2022/11/17 2022/11/18 2022/11/21 2022/11/22 2022/11/23 2022/11/24 2022/11/25 2022/11/28 2022/11/29
0 0.080758 0.075006 0.082538 0.116989 0.118396 0.134412 0.134433 0.115682 0.128689 0.139250 ... 0.335385 0.342154 0.356020 0.374443 0.380340 0.369360 0.368283 0.366693 0.372917 0.371762
1 1.753556 1.732565 1.759496 1.697401 1.724189 1.697992 1.694361 1.728429 1.669788 1.610996 ... 1.502571 1.473097 1.449508 1.455663 1.473517 1.472229 1.475683 1.477276 1.456892 1.438146
2 -0.272341 -0.291313 -0.281387 -0.265981 -0.273996 -0.267154 -0.270360 -0.279839 -0.284231 -0.273797 ... -0.183931 -0.186452 -0.173596 -0.157886 -0.162388 -0.155062 -0.154670 -0.154172 -0.145492 -0.150419
3 0.209940 0.192862 0.200418 0.211818 0.224095 0.223355 0.240812 0.300925 0.290236 0.304127 ... 0.111351 0.095990 0.100624 0.113880 0.106750 0.099641 0.106908 0.113933 0.100549 0.110384
4 -0.010858 -0.037380 -0.039505 -0.053742 -0.028718 -0.017533 -0.014789 -0.002612 -0.021964 -0.022895 ... -0.005810 -0.004637 0.006656 0.004493 0.003765 -0.001146 -0.001095 -0.001120 -0.005822 -0.017963
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 -0.380245 -0.378307 -0.367649 -0.394835 -0.394610 -0.402058 -0.407992 -0.399850 -0.399016 -0.404574 ... -0.515597 -0.516485 -0.520716 -0.524340 -0.522620 -0.519447 -0.520871 -0.521741 -0.521279 -0.522067
96 -0.423803 -0.433297 -0.425894 -0.417332 -0.422810 -0.421448 -0.426719 -0.430860 -0.435755 -0.429740 ... -0.360463 -0.367849 -0.360085 -0.357551 -0.359177 -0.355821 -0.354753 -0.353343 -0.350608 -0.355512
97 -0.371302 -0.374828 -0.368652 -0.356722 -0.364812 -0.361658 -0.365287 -0.372613 -0.369678 -0.364870 ... -0.316246 -0.313639 -0.311613 -0.305699 -0.308337 -0.309548 -0.310037 -0.310229 -0.303450 -0.306375
98 -0.306167 -0.299894 -0.299118 -0.297662 -0.308901 -0.309966 -0.306229 -0.301108 -0.294648 -0.296585 ... -0.263420 -0.266970 -0.267464 -0.265167 -0.257908 -0.258950 -0.259863 -0.260537 -0.265970 -0.264222
99 -0.781999 -0.793792 -0.794028 -0.770510 -0.780057 -0.782241 -0.793903 -0.799676 -0.783631 -0.762398 ... -0.794104 -0.793817 -0.796083 -0.800242 -0.805400 -0.808345 -0.807739 -0.806248 -0.797501 -0.800865

100 rows × 216 columns

In [35]:
np_harga =  df_harga.to_numpy()
df_gabung['seq_price'] = 0
df_gabung['seq_price'] = df_gabung['seq_price'].astype('object')

for i in range(df_gabung.shape[0]):
    df_gabung.at[i,'seq_price'] = np_harga[i,]
df_gabung
Out[35]:
id class symbol name GICS_sector/ETF_type GICS_industry/ETF_subtype seq_price
0 1 Stock ABBV AbbVie Health Care Pharmaceuticals [0.08075840741815742, 0.07500566197616174, 0.0...
1 2 Stock ACN Accenture Information Technology IT Consulting & Other Services [1.753556465328479, 1.7325646305772289, 1.7594...
2 3 Stock AEP American Electric Power Utilities Electric Utilities [-0.27234082530230586, -0.2913129086215058, -0...
3 4 Stock AIZ Assurant Financials Multi-line Insurance [0.2099395466258561, 0.1928620153108893, 0.200...
4 5 Stock ALLE Allegion Industrials Building Products [-0.010858490675997503, -0.037380047990928526,...
... ... ... ... ... ... ... ...
95 96 ETF XLC Communication Services Select Sector SPDR Fund Equities Sector-US [-0.38024534356678646, -0.37830686491863863, -...
96 97 ETF XLU Utilities Select Sector SPDR Fund Equities Sector-US [-0.4238025639369318, -0.43329679131854365, -0...
97 98 ETF XLP Consumer Staples Select Sector SPDR Fund Equities Sector-US [-0.371302212150363, -0.37482833258123344, -0....
98 99 ETF XLB Materials Select Sector SPDR Fund Equities Sector-US [-0.30616717729188686, -0.2998943099825053, -0...
99 100 ETF VXX iPath Series B S&P 500 VIX Short-Term Futures ETN Volatility US [-0.7819989181900717, -0.7937922681355329, -0....

100 rows × 7 columns

In [36]:
df_gabung = df_gabung.drop(['id','symbol','name','GICS_industry/ETF_subtype'],axis=1)
df_gabung
Out[36]:
class GICS_sector/ETF_type seq_price
0 Stock Health Care [0.08075840741815742, 0.07500566197616174, 0.0...
1 Stock Information Technology [1.753556465328479, 1.7325646305772289, 1.7594...
2 Stock Utilities [-0.27234082530230586, -0.2913129086215058, -0...
3 Stock Financials [0.2099395466258561, 0.1928620153108893, 0.200...
4 Stock Industrials [-0.010858490675997503, -0.037380047990928526,...
... ... ... ...
95 ETF Equities [-0.38024534356678646, -0.37830686491863863, -...
96 ETF Equities [-0.4238025639369318, -0.43329679131854365, -0...
97 ETF Equities [-0.371302212150363, -0.37482833258123344, -0....
98 ETF Equities [-0.30616717729188686, -0.2998943099825053, -0...
99 ETF Volatility [-0.7819989181900717, -0.7937922681355329, -0....

100 rows × 3 columns

Windowing¶

Function Windowing¶

In [37]:
def windowing(x,width_window=7,width_output=1):
    x_win = []
    y_win = []
    for i in range(0,(x.shape[0] - width_output - width_window)):
        x_win.append(x[i:i+width_window])
        y_win.append(x[ i + width_window : i + width_window + width_output])
        # y_win.append(x[ i + width_window : i + width_window + width_output])
    
    jawaban = [x_win,y_win]
    return jawaban

Windowing Parameter¶

In [38]:
width_window = 7
width_output = 7

Windowing Application¶

In [39]:
df_gabung['priceku'] = df_gabung.seq_price.apply(windowing,args=(width_window,width_output))
df_gabung['x_price'] = df_gabung.priceku.apply(lambda x : x[0])
df_gabung['y_price'] = df_gabung.priceku.apply(lambda x : x[1])
df_gabung = df_gabung.drop(['priceku','seq_price'],axis=1)
df_gabung
Out[39]:
class GICS_sector/ETF_type x_price y_price
0 Stock Health Care [[0.08075840741815742, 0.07500566197616174, 0.... [[0.11568206821845028, 0.12868870943053295, 0....
1 Stock Information Technology [[1.753556465328479, 1.7325646305772289, 1.759... [[1.7284285942870274, 1.6697876603662474, 1.61...
2 Stock Utilities [[-0.27234082530230586, -0.2913129086215058, -... [[-0.2798392449843054, -0.28423149639119083, -...
3 Stock Financials [[0.2099395466258561, 0.1928620153108893, 0.20... [[0.3009253625204034, 0.2902360103301251, 0.30...
4 Stock Industrials [[-0.010858490675997503, -0.037380047990928526... [[-0.0026121309096905935, -0.02196386370220516...
... ... ... ... ...
95 ETF Equities [[-0.38024534356678646, -0.37830686491863863, ... [[-0.39984950348743575, -0.39901551073312613, ...
96 ETF Equities [[-0.4238025639369318, -0.43329679131854365, -... [[-0.43086006316644776, -0.43575533933118626, ...
97 ETF Equities [[-0.371302212150363, -0.37482833258123344, -0... [[-0.37261275579678543, -0.36967762032061435, ...
98 ETF Equities [[-0.30616717729188686, -0.2998943099825053, -... [[-0.3011077507778687, -0.29464818231689477, -...
99 ETF Volatility [[-0.7819989181900717, -0.7937922681355329, -0... [[-0.7996761514938153, -0.7836310133474043, -0...

100 rows × 4 columns

In [40]:
df_gabung_long = pd.DataFrame({'class':['stock'],'GICS_sector/ETF_type':['Health Care'],'x_price':[10],'y_price':[10]},index=[-1])
df_gabung_long.x_price = df_gabung_long.x_price.astype('object')
df_gabung_long.y_price = df_gabung_long.y_price.astype('object')
df_gabung_long = df_gabung_long.drop([-1],axis=0)
df_gabung_long
Out[40]:
class GICS_sector/ETF_type x_price y_price
In [41]:
bb = 0
for i,row in df_gabung.iterrows():
    x_price = row['x_price']
    y_price = row['y_price']
    for j in range(0,len(x_price)):
        teh = pd.DataFrame({'class':row['class'],'GICS_sector/ETF_type':row['GICS_sector/ETF_type'],'x_price':[0],'y_price':[0]},index=[bb])
        teh.x_price = teh.x_price.astype('object')
        teh.y_price = teh.y_price.astype('object')
        teh.at[bb,'x_price'] = x_price[j].tolist()
        teh.at[bb,'y_price'] = y_price[j].tolist()
        df_gabung_long = pd.concat([df_gabung_long,teh])
        bb += 1
    # teh.at[0,'x_price'] = [12,12]
    # teh.at[0,'y_price'] = [12,12]
print(bb)
df_gabung_long
20200
Out[41]:
class GICS_sector/ETF_type x_price y_price
0 Stock Health Care [0.08075840741815742, 0.07500566197616174, 0.0... [0.11568206821845028, 0.12868870943053295, 0.1...
1 Stock Health Care [0.07500566197616174, 0.08253792017385496, 0.1... [0.12868870943053295, 0.13925031582242625, 0.1...
2 Stock Health Care [0.08253792017385496, 0.11698922038766922, 0.1... [0.13925031582242625, 0.15421535804154152, 0.1...
3 Stock Health Care [0.11698922038766922, 0.11839616830697988, 0.1... [0.15421535804154152, 0.1525872312372209, 0.15...
4 Stock Health Care [0.11839616830697988, 0.13441154018510995, 0.1... [0.1525872312372209, 0.1592136445343873, 0.171...
... ... ... ... ...
20195 ETF Volatility [-0.7609416493957146, -0.7662245026340819, -0.... [-0.7904347265681501, -0.7949206480211323, -0....
20196 ETF Volatility [-0.7662245026340819, -0.7713436978517734, -0.... [-0.7949206480211323, -0.7941038139544377, -0....
20197 ETF Volatility [-0.7713436978517734, -0.768941818972556, -0.7... [-0.7941038139544377, -0.7938168239500139, -0....
20198 ETF Volatility [-0.768941818972556, -0.7588953474828987, -0.7... [-0.7938168239500139, -0.7960828536054464, -0....
20199 ETF Volatility [-0.7588953474828987, -0.7858683036675098, -0.... [-0.7960828536054464, -0.8002422632697174, -0....

20200 rows × 4 columns

In [42]:
kat_enc   = pd.get_dummies(df_gabung_long['GICS_sector/ETF_type'], prefix='kat')
df_gabung_long = pd.concat([df_gabung_long,kat_enc],axis=1)

class_enc   = pd.get_dummies(df_gabung_long['class'], prefix='class')
df_gabung_long = pd.concat([df_gabung_long,class_enc],axis=1)

df_gabung_long = df_gabung_long.drop(['class','GICS_sector/ETF_type'],axis=1)
df_gabung_long
Out[42]:
x_price y_price kat_Commodities kat_Communication Services kat_Consumer Discretionary kat_Consumer Staples kat_Energy kat_Equities kat_Financials kat_Fixed Income kat_Health Care kat_Industrials kat_Information Technology kat_Materials kat_Real Estate kat_Utilities kat_Volatility class_ETF class_Stock
0 [0.08075840741815742, 0.07500566197616174, 0.0... [0.11568206821845028, 0.12868870943053295, 0.1... 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
1 [0.07500566197616174, 0.08253792017385496, 0.1... [0.12868870943053295, 0.13925031582242625, 0.1... 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
2 [0.08253792017385496, 0.11698922038766922, 0.1... [0.13925031582242625, 0.15421535804154152, 0.1... 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
3 [0.11698922038766922, 0.11839616830697988, 0.1... [0.15421535804154152, 0.1525872312372209, 0.15... 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
4 [0.11839616830697988, 0.13441154018510995, 0.1... [0.1525872312372209, 0.1592136445343873, 0.171... 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
20195 [-0.7609416493957146, -0.7662245026340819, -0.... [-0.7904347265681501, -0.7949206480211323, -0.... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0
20196 [-0.7662245026340819, -0.7713436978517734, -0.... [-0.7949206480211323, -0.7941038139544377, -0.... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0
20197 [-0.7713436978517734, -0.768941818972556, -0.7... [-0.7941038139544377, -0.7938168239500139, -0.... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0
20198 [-0.768941818972556, -0.7588953474828987, -0.7... [-0.7938168239500139, -0.7960828536054464, -0.... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0
20199 [-0.7588953474828987, -0.7858683036675098, -0.... [-0.7960828536054464, -0.8002422632697174, -0.... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0

20200 rows × 19 columns

In [43]:
kat_enc   = pd.get_dummies(df_gabung['GICS_sector/ETF_type'], prefix='kat')
df_gabung = pd.concat([df_gabung,kat_enc],axis=1)

class_enc   = pd.get_dummies(df_gabung['class'], prefix='class')
df_gabung = pd.concat([df_gabung,class_enc],axis=1)

df_gabung = df_gabung.drop(['class','GICS_sector/ETF_type'],axis=1)
df_gabung
Out[43]:
x_price y_price kat_Commodities kat_Communication Services kat_Consumer Discretionary kat_Consumer Staples kat_Energy kat_Equities kat_Financials kat_Fixed Income kat_Health Care kat_Industrials kat_Information Technology kat_Materials kat_Real Estate kat_Utilities kat_Volatility class_ETF class_Stock
0 [[0.08075840741815742, 0.07500566197616174, 0.... [[0.11568206821845028, 0.12868870943053295, 0.... 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 1
1 [[1.753556465328479, 1.7325646305772289, 1.759... [[1.7284285942870274, 1.6697876603662474, 1.61... 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1
2 [[-0.27234082530230586, -0.2913129086215058, -... [[-0.2798392449843054, -0.28423149639119083, -... 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1
3 [[0.2099395466258561, 0.1928620153108893, 0.20... [[0.3009253625204034, 0.2902360103301251, 0.30... 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1
4 [[-0.010858490675997503, -0.037380047990928526... [[-0.0026121309096905935, -0.02196386370220516... 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
95 [[-0.38024534356678646, -0.37830686491863863, ... [[-0.39984950348743575, -0.39901551073312613, ... 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
96 [[-0.4238025639369318, -0.43329679131854365, -... [[-0.43086006316644776, -0.43575533933118626, ... 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
97 [[-0.371302212150363, -0.37482833258123344, -0... [[-0.37261275579678543, -0.36967762032061435, ... 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
98 [[-0.30616717729188686, -0.2998943099825053, -... [[-0.3011077507778687, -0.29464818231689477, -... 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0
99 [[-0.7819989181900717, -0.7937922681355329, -0... [[-0.7996761514938153, -0.7836310133474043, -0... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0

100 rows × 19 columns

Modelling¶

Import Library¶

In [44]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dropout,LSTM,Dense,Input, Flatten, Lambda,GRU,concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.utils import plot_model

Desain Architecture NN¶

In [45]:
width_window
Out[45]:
7

Right Branch (Price Only)¶

In [46]:
right_input_layer = tf.keras.Input(shape=(1,width_window,))
lstm_r_1 = LSTM(width_window,return_sequences=True)(right_input_layer)
lstm_r_2 = LSTM(width_window//2)(lstm_r_1)

Left Branch (Structuted)¶

In [47]:
left_input_layer = tf.keras.Input(shape=(17,))
dense_l_1 = Dense(8,activation='relu')(left_input_layer)
dense_l_2 = Dense(4,activation='relu')(dense_l_1)
dense_l_3 = Dense(1,activation='relu')(dense_l_2)

# flat2 = Flatten()(dense1)

Merge Branch¶

In [48]:
nn_mer = concatenate([lstm_r_2,dense_l_3])
outputs = Dense(width_output)(nn_mer)
In [49]:
model_nn = Model(inputs = [left_input_layer, right_input_layer], outputs = outputs)

Plot Model Neural Network¶

In [50]:
plot_model(model_nn,
    show_shapes=True,
    show_dtype=True,
    show_layer_names=True,
    rankdir='TB',
    expand_nested=True,
    dpi=100,
    layer_range=None,
    show_layer_activations=True)
Out[50]:

Prepare Input¶

Left Branch¶

In [51]:
left_df = df_gabung_long.iloc[:,2:].to_numpy()
left_df = left_df.astype('float32')
left_df
Out[51]:
array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 1.],
       ...,
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.],
       [0., 0., 0., ..., 1., 1., 0.]], dtype=float32)

Right Branch¶

In [52]:
right_df = df_gabung_long['x_price']
right_df
Out[52]:
0        [0.08075840741815742, 0.07500566197616174, 0.0...
1        [0.07500566197616174, 0.08253792017385496, 0.1...
2        [0.08253792017385496, 0.11698922038766922, 0.1...
3        [0.11698922038766922, 0.11839616830697988, 0.1...
4        [0.11839616830697988, 0.13441154018510995, 0.1...
                               ...                        
20195    [-0.7609416493957146, -0.7662245026340819, -0....
20196    [-0.7662245026340819, -0.7713436978517734, -0....
20197    [-0.7713436978517734, -0.768941818972556, -0.7...
20198    [-0.768941818972556, -0.7588953474828987, -0.7...
20199    [-0.7588953474828987, -0.7858683036675098, -0....
Name: x_price, Length: 20200, dtype: object
In [53]:
right_df = np.array(right_df.tolist())
right_df = np.reshape(right_df,(right_df.shape[0],1,right_df.shape[1]))
right_df.shape
Out[53]:
(20200, 1, 7)

Y¶

In [54]:
y_output = df_gabung_long['y_price']
y_output = np.array(y_output.tolist())
y_output
Out[54]:
array([[ 0.11568207,  0.12868871,  0.13925032, ...,  0.15258723,
         0.15921364,  0.17109802],
       [ 0.12868871,  0.13925032,  0.15421536, ...,  0.15921364,
         0.17109802,  0.1686085 ],
       [ 0.13925032,  0.15421536,  0.15258723, ...,  0.17109802,
         0.1686085 ,  0.18137622],
       ...,
       [-0.79410381, -0.79381682, -0.79608285, ..., -0.80540014,
        -0.80834464, -0.80773946],
       [-0.79381682, -0.79608285, -0.80024226, ..., -0.80834464,
        -0.80773946, -0.80624771],
       [-0.79608285, -0.80024226, -0.80540014, ..., -0.80773946,
        -0.80624771, -0.79750071]])

Compile, Training ,Running¶

In [56]:
reducelr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.1,
    patience=5,
    verbose=0,
    mode='auto',
    min_delta=0.0000000001,
    cooldown=0,
    min_lr=0,
)


early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0,
    patience=6,
    verbose=0,
    mode='auto',
    baseline=None,
    restore_best_weights=True,
)
In [57]:
model_nn.compile(loss='mean_squared_error',metrics='mean_absolute_error',optimizer='adam')
In [58]:
history = model_nn.fit([left_df,right_df],y_output,epochs=100,validation_split=0.2,callbacks=[reducelr,early_stop])
Epoch 1/100
505/505 [==============================] - 16s 11ms/step - loss: 0.4832 - mean_absolute_error: 0.4330 - val_loss: 0.7644 - val_mean_absolute_error: 0.3287 - lr: 0.0010
Epoch 2/100
505/505 [==============================] - 3s 6ms/step - loss: 0.1021 - mean_absolute_error: 0.1566 - val_loss: 0.4691 - val_mean_absolute_error: 0.2041 - lr: 0.0010
Epoch 3/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0334 - mean_absolute_error: 0.0779 - val_loss: 0.3348 - val_mean_absolute_error: 0.1534 - lr: 0.0010
Epoch 4/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0152 - mean_absolute_error: 0.0510 - val_loss: 0.2637 - val_mean_absolute_error: 0.1326 - lr: 0.0010
Epoch 5/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0090 - mean_absolute_error: 0.0413 - val_loss: 0.2160 - val_mean_absolute_error: 0.1188 - lr: 0.0010
Epoch 6/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0060 - mean_absolute_error: 0.0359 - val_loss: 0.1797 - val_mean_absolute_error: 0.1083 - lr: 0.0010
Epoch 7/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0043 - mean_absolute_error: 0.0324 - val_loss: 0.1525 - val_mean_absolute_error: 0.0996 - lr: 0.0010
Epoch 8/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0034 - mean_absolute_error: 0.0300 - val_loss: 0.1314 - val_mean_absolute_error: 0.0931 - lr: 0.0010
Epoch 9/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0028 - mean_absolute_error: 0.0283 - val_loss: 0.1148 - val_mean_absolute_error: 0.0865 - lr: 0.0010
Epoch 10/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0024 - mean_absolute_error: 0.0266 - val_loss: 0.1009 - val_mean_absolute_error: 0.0815 - lr: 0.0010
Epoch 11/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0022 - mean_absolute_error: 0.0258 - val_loss: 0.0912 - val_mean_absolute_error: 0.0775 - lr: 0.0010
Epoch 12/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0021 - mean_absolute_error: 0.0253 - val_loss: 0.0832 - val_mean_absolute_error: 0.0743 - lr: 0.0010
Epoch 13/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0020 - mean_absolute_error: 0.0246 - val_loss: 0.0773 - val_mean_absolute_error: 0.0723 - lr: 0.0010
Epoch 14/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0019 - mean_absolute_error: 0.0244 - val_loss: 0.0726 - val_mean_absolute_error: 0.0698 - lr: 0.0010
Epoch 15/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0019 - mean_absolute_error: 0.0240 - val_loss: 0.0686 - val_mean_absolute_error: 0.0682 - lr: 0.0010
Epoch 16/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0019 - mean_absolute_error: 0.0238 - val_loss: 0.0655 - val_mean_absolute_error: 0.0667 - lr: 0.0010
Epoch 17/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0236 - val_loss: 0.0639 - val_mean_absolute_error: 0.0659 - lr: 0.0010
Epoch 18/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0019 - mean_absolute_error: 0.0238 - val_loss: 0.0625 - val_mean_absolute_error: 0.0652 - lr: 0.0010
Epoch 19/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0235 - val_loss: 0.0595 - val_mean_absolute_error: 0.0641 - lr: 0.0010
Epoch 20/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0019 - mean_absolute_error: 0.0236 - val_loss: 0.0553 - val_mean_absolute_error: 0.0617 - lr: 0.0010
Epoch 21/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0019 - mean_absolute_error: 0.0236 - val_loss: 0.0602 - val_mean_absolute_error: 0.0636 - lr: 0.0010
Epoch 22/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0232 - val_loss: 0.0574 - val_mean_absolute_error: 0.0628 - lr: 0.0010
Epoch 23/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0233 - val_loss: 0.0546 - val_mean_absolute_error: 0.0636 - lr: 0.0010
Epoch 24/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0232 - val_loss: 0.0522 - val_mean_absolute_error: 0.0598 - lr: 0.0010
Epoch 25/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0231 - val_loss: 0.0508 - val_mean_absolute_error: 0.0592 - lr: 0.0010
Epoch 26/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0230 - val_loss: 0.0491 - val_mean_absolute_error: 0.0584 - lr: 0.0010
Epoch 27/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0231 - val_loss: 0.0487 - val_mean_absolute_error: 0.0579 - lr: 0.0010
Epoch 28/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0232 - val_loss: 0.0488 - val_mean_absolute_error: 0.0583 - lr: 0.0010
Epoch 29/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0230 - val_loss: 0.0480 - val_mean_absolute_error: 0.0580 - lr: 0.0010
Epoch 30/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0227 - val_loss: 0.0472 - val_mean_absolute_error: 0.0576 - lr: 0.0010
Epoch 31/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0232 - val_loss: 0.0450 - val_mean_absolute_error: 0.0563 - lr: 0.0010
Epoch 32/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0227 - val_loss: 0.0459 - val_mean_absolute_error: 0.0564 - lr: 0.0010
Epoch 33/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0230 - val_loss: 0.0464 - val_mean_absolute_error: 0.0570 - lr: 0.0010
Epoch 34/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0230 - val_loss: 0.0466 - val_mean_absolute_error: 0.0567 - lr: 0.0010
Epoch 35/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0227 - val_loss: 0.0460 - val_mean_absolute_error: 0.0563 - lr: 0.0010
Epoch 36/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0228 - val_loss: 0.0432 - val_mean_absolute_error: 0.0551 - lr: 0.0010
Epoch 37/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0228 - val_loss: 0.0472 - val_mean_absolute_error: 0.0574 - lr: 0.0010
Epoch 38/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0229 - val_loss: 0.0429 - val_mean_absolute_error: 0.0557 - lr: 0.0010
Epoch 39/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0227 - val_loss: 0.0468 - val_mean_absolute_error: 0.0577 - lr: 0.0010
Epoch 40/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0226 - val_loss: 0.0447 - val_mean_absolute_error: 0.0560 - lr: 0.0010
Epoch 41/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0227 - val_loss: 0.0455 - val_mean_absolute_error: 0.0567 - lr: 0.0010
Epoch 42/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0017 - mean_absolute_error: 0.0225 - val_loss: 0.0445 - val_mean_absolute_error: 0.0563 - lr: 0.0010
Epoch 43/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0018 - mean_absolute_error: 0.0231 - val_loss: 0.0439 - val_mean_absolute_error: 0.0556 - lr: 0.0010
Epoch 44/100
505/505 [==============================] - 3s 7ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0426 - val_mean_absolute_error: 0.0544 - lr: 1.0000e-04
Epoch 45/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0430 - val_mean_absolute_error: 0.0549 - lr: 1.0000e-04
Epoch 46/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0429 - val_mean_absolute_error: 0.0546 - lr: 1.0000e-04
Epoch 47/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0424 - val_mean_absolute_error: 0.0543 - lr: 1.0000e-04
Epoch 48/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0417 - val_mean_absolute_error: 0.0539 - lr: 1.0000e-04
Epoch 49/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0419 - val_mean_absolute_error: 0.0540 - lr: 1.0000e-04
Epoch 50/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0417 - val_mean_absolute_error: 0.0539 - lr: 1.0000e-04
Epoch 51/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0411 - val_mean_absolute_error: 0.0536 - lr: 1.0000e-04
Epoch 52/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0415 - val_mean_absolute_error: 0.0538 - lr: 1.0000e-04
Epoch 53/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0418 - val_mean_absolute_error: 0.0539 - lr: 1.0000e-04
Epoch 54/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0401 - val_mean_absolute_error: 0.0530 - lr: 1.0000e-04
Epoch 55/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0411 - val_mean_absolute_error: 0.0536 - lr: 1.0000e-04
Epoch 56/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0217 - val_loss: 0.0401 - val_mean_absolute_error: 0.0530 - lr: 1.0000e-04
Epoch 57/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0216 - val_loss: 0.0401 - val_mean_absolute_error: 0.0530 - lr: 1.0000e-04
Epoch 58/100
505/505 [==============================] - 3s 7ms/step - loss: 0.0016 - mean_absolute_error: 0.0216 - val_loss: 0.0409 - val_mean_absolute_error: 0.0536 - lr: 1.0000e-04
Epoch 59/100
505/505 [==============================] - 3s 7ms/step - loss: 0.0016 - mean_absolute_error: 0.0216 - val_loss: 0.0406 - val_mean_absolute_error: 0.0533 - lr: 1.0000e-04
Epoch 60/100
505/505 [==============================] - 3s 5ms/step - loss: 0.0016 - mean_absolute_error: 0.0216 - val_loss: 0.0403 - val_mean_absolute_error: 0.0531 - lr: 1.0000e-04
Epoch 61/100
505/505 [==============================] - 3s 5ms/step - loss: 0.0016 - mean_absolute_error: 0.0216 - val_loss: 0.0406 - val_mean_absolute_error: 0.0534 - lr: 1.0000e-04
Epoch 62/100
505/505 [==============================] - 3s 6ms/step - loss: 0.0016 - mean_absolute_error: 0.0215 - val_loss: 0.0406 - val_mean_absolute_error: 0.0533 - lr: 1.0000e-05
In [59]:
plt.plot(history.history['mean_absolute_error'],label='train')
plt.plot(history.history['val_mean_absolute_error'],label='val')
plt.legend()
plt.show()
In [60]:
print("hasil_predict",model_nn.predict([np.reshape(left_df[0],(1,left_df[0].shape[0])), np.reshape(right_df[0],(1,right_df[0].shape[0],right_df[0].shape[1]))]))
print("hasil_sebenarnya",y_output[0])
1/1 [==============================] - 5s 5s/step
hasil_predict [[0.13602126 0.13611883 0.1364649  0.13602024 0.13592094 0.13575691
  0.13456589]]
hasil_sebenarnya [0.11568207 0.12868871 0.13925032 0.15421536 0.15258723 0.15921364
 0.17109802]

masukkin objeknya harusnya dalam bentuk list jangan menggunakan numpy